In [10]:
# Logger
import logging
logging.basicConfig(level=logging.INFO)

# Import local paths
import sys, os
sys.path.append(os.path.abspath('../../..'))
sys.path.append(os.path.abspath('../../../../openai-envs'))

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load PyALCS module
from lcs.agents import EnvironmentAdapter
from lcs.agents.acs2 import ACS2, Configuration, ClassifiersList

# Load environments
import gym
import gym_corridor


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

Corridor

Actions:

MOVE_LEFT = 0
MOVE_RIGHT = 1

In [11]:
class CorridorAdapter(EnvironmentAdapter):
    @staticmethod
    def to_genotype(phenotype):
        return phenotype,

In [12]:
cfg = Configuration(
        classifier_length=1,
        number_of_possible_actions=2,
        epsilon=1.0,
        beta=0.05,
        gamma=0.95,
        theta_exp=50,
        theta_ga=50,
        do_ga=True,
        mu=0.03,
        u_max=1,
        metrics_trial_frequency=20,
        environment_adapter=CorridorAdapter)

In [13]:
def print_simple_stats(population, metrics):
    pop_size = len(population)
    
    # avg step in trial
    steps = []
    for m in metrics:
        steps.append(m['steps_in_trial'])

    print(f"Population of {pop_size}, avg steps {sum(steps)/len(steps)}")

Corridor 20

Exploration


In [14]:
corridor = gym.make('corridor-20-v0')

In [15]:
%%time
agent = ACS2(cfg)
population, metrics = agent.explore(corridor, 1000)


INFO:lcs.agents.Agent:{'trial': 0, 'steps_in_trial': 101, 'reward': 1000}
INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 200, 'reward': 0}
CPU times: user 31.8 s, sys: 157 ms, total: 32 s
Wall time: 32.5 s

In [16]:
print_simple_stats(population, metrics)


Population of 38, avg steps 160.28

In [17]:
sorted(population, key=lambda cl: -cl.fitness)[:5]


Out[17]:
[19 1 20               (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 262 tga: 157928 talp: 157928 tav: 1.14e+03 num: 20,
 18 1 19               (empty)               q: 1.0   r: 950.0  ir: 0.0    f: 950.0  exp: 475 tga: 157915 talp: 157928 tav: 6.32e+02 num: 20,
 17 1 18               (empty)               q: 1.0   r: 902.5  ir: 0.0    f: 902.5  exp: 640 tga: 157914 talp: 157927 tav: 4.45e+02 num: 20,
 19 0 18               (empty)               q: 1.0   r: 902.4  ir: 0.0    f: 902.4  exp: 264 tga: 157916 talp: 157917 tav: 1.1e+03 num: 20,
 16 1 17               (empty)               q: 1.0   r: 857.4  ir: 0.0    f: 857.4  exp: 752 tga: 157925 talp: 157926 tav: 4.57e+02 num: 20]

Exploitation


In [19]:
%%time
agent = ACS2(cfg, population)
pop_exploit, metric_exploit = agent.exploit(corridor, 100)


INFO:lcs.agents.Agent:{'trial': 0, 'steps_in_trial': 4, 'reward': 1000}
CPU times: user 106 ms, sys: 2.86 ms, total: 108 ms
Wall time: 114 ms

In [20]:
print_simple_stats(pop_exploit, metric_exploit)


Population of 38, avg steps 11.8

In [21]:
sorted(pop_exploit, key=lambda cl: -cl.fitness)[:5]


Out[21]:
[19 1 20               (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 262 tga: 157928 talp: 157928 tav: 1.14e+03 num: 20,
 18 1 19               (empty)               q: 1.0   r: 950.0  ir: 0.0    f: 950.0  exp: 475 tga: 157915 talp: 157928 tav: 6.32e+02 num: 20,
 17 1 18               (empty)               q: 1.0   r: 902.5  ir: 0.0    f: 902.5  exp: 640 tga: 157914 talp: 157927 tav: 4.45e+02 num: 20,
 19 0 18               (empty)               q: 1.0   r: 902.4  ir: 0.0    f: 902.4  exp: 264 tga: 157916 talp: 157917 tav: 1.1e+03 num: 20,
 16 1 17               (empty)               q: 1.0   r: 857.4  ir: 0.0    f: 857.4  exp: 752 tga: 157925 talp: 157926 tav: 4.57e+02 num: 20]